2  Tables

This document explains how to reproduce the tables presented in the paper.

3 Install Packages

We install the following packages using the groundhog package manager to increase computational reproducibility.

options(repos = c(CRAN = "https://cran.r-project.org")) 


if (!requireNamespace("groundhog", quietly = TRUE)) {
    install.packages("groundhog")
}

pkgs <- c("magrittr", "data.table", "stringr", "lubridate", "knitr", 
          "sandwich", "lmtest",
          "sjPlot", "stargazer", "gt")

groundhog::groundhog.library(pkg = pkgs,
                             date = "2024-08-01")

rm(pkgs)

3.1 Read Data

# data <- data.table::fread(file = "../data/processed/full.csv")
data <- readRDS(file="../data/processed/full.Rda")

3.2 Table 2

This table was created in a manual process, the corresponding summary statistics can be queried using the following code, where surprise == FALSE selects the confirmation treatment arm:

data[communication == "point" & surprise == FALSE,      # choose condition
     .(mean = round(mean(b, na.rm = TRUE), digits = 2), # choose variable
       sd = round(sd(b, na.rm = TRUE), digits = 2)),    # choose variable
     by = stage] %>% 
  kable()
stage mean sd
1 -0.05 0.27
2 -0.09 0.31

The number of observations by treatment can be queried using the following code:

data[, 
     .(N = length(unique(participant.label))), 
     by = c("communication", "surprise")][order(surprise, communication)] %>%
  kable()
communication surprise N
point FALSE 255
both FALSE 247
interval FALSE 243
point TRUE 252
both TRUE 250
interval TRUE 258
# alternatively:
# data[stage == 2, 
#      .N, 
#      by = c("communication", "surprise")][order(surprise, communication)]
# Melt the data
long_df <- melt(data,
                id.vars = c("stage", "surprise", "communication"),
                measure.vars = c("b", "a", "E1", "E2", "E3", "E12", "E13", "E23"),
                variable.name = "Variable",
                value.name = "Value")

# Function to calculate mean and sd
calculate_stats <- function(x) {
  c(mean = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE))
}

# Calculate pooled summary
pooled_summary <- long_df[, as.list(calculate_stats(Value)), by = .(Variable, stage)]
setnames(pooled_summary, c("mean", "sd"), c("mean_pooled", "sd_pooled"))

# Calculate summary by treatment
summary_tmp <- long_df[, as.list(calculate_stats(Value)), 
                       by = .(surprise, communication, Variable, stage)]

# Reshape summary_tmp to wide format
summary_wide <- dcast(summary_tmp, 
                      Variable + stage ~ surprise + communication, 
                      value.var = c("mean", "sd"))

# Merge pooled summary with reshaped summary
summary_table_wide <- merge(x = pooled_summary,
                            y = summary_wide, 
                            by = c("Variable", "stage"))

# Function to format mean and sd without line break
format_mean_sd <- function(mean, sd) {
  sprintf("%.2f (%.2f)", mean, sd)
}

# Apply formatting
cols_to_format <- names(summary_table_wide)[3:length(names(summary_table_wide))]
summary_table_wide[, (cols_to_format) := lapply(.SD, as.numeric), .SDcols = cols_to_format]
summary_table_wide[, c("Pooled", "Confirmation_point", "Confirmation_both", "Confirmation_interval", 
                       "Contradiction_point", "Contradiction_both", "Contradiction_interval") := 
                     .(format_mean_sd(mean_pooled, sd_pooled),
                       format_mean_sd(mean_FALSE_point, sd_FALSE_point),
                       format_mean_sd(mean_FALSE_both, sd_FALSE_both),
                       format_mean_sd(mean_FALSE_interval, sd_FALSE_interval),
                       format_mean_sd(mean_TRUE_point, sd_TRUE_point),
                       format_mean_sd(mean_TRUE_both, sd_TRUE_both),
                       format_mean_sd(mean_TRUE_interval, sd_TRUE_interval))]

# Select only the formatted columns
summary_table_final <- summary_table_wide[, .(Variable, stage, Pooled, 
                                              Confirmation_point, Confirmation_both, Confirmation_interval,
                                              Contradiction_point, Contradiction_both, Contradiction_interval)]

# Create gt table
summary_table_final %>% 
  gt(groupname_col = "Variable") %>% 
  cols_label(
    stage = "Stage",
    Pooled = "Pooled",
    Confirmation_point = "Point",
    Confirmation_both = "Point + interval",
    Confirmation_interval = "Interval",
    Contradiction_point = "Point",
    Contradiction_both = "Point + interval",
    Contradiction_interval = "Interval"
  ) %>%
  tab_spanner(
    label = "Confirmation",
    columns = c(Confirmation_point, Confirmation_both, Confirmation_interval)
  ) %>%
  tab_spanner(
    label = "Contradiction",
    columns = c(Contradiction_point, Contradiction_both, Contradiction_interval)
  ) %>%
  cols_align(
    align = "left",
    columns = c(Variable, stage)
  ) %>%
  cols_align(
    align = "center",
    columns = c(Pooled, Confirmation_point, Confirmation_both, Confirmation_interval,
                Contradiction_point, Contradiction_both, Contradiction_interval)
  ) %>%
  opt_row_striping() %>%
  tab_options(
    table.font.size = px(12),
    data_row.padding = px(4)
  )
Stage Pooled Confirmation Contradiction
Point Point + interval Interval Point Point + interval Interval
b
1 -0.07 (0.30) -0.05 (0.27) -0.09 (0.30) -0.07 (0.29) -0.04 (0.31) -0.08 (0.31) -0.08 (0.31)
2 -0.08 (0.32) -0.09 (0.31) -0.09 (0.32) -0.08 (0.32) -0.04 (0.31) -0.10 (0.34) -0.07 (0.34)
a
1 0.72 (0.49) 0.71 (0.50) 0.78 (0.47) 0.73 (0.51) 0.71 (0.49) 0.68 (0.44) 0.69 (0.51)
2 0.71 (0.54) 0.73 (0.54) 0.73 (0.51) 0.73 (0.56) 0.69 (0.54) 0.70 (0.53) 0.69 (0.56)
E1
1 47.41 (20.96) 46.65 (19.05) 49.08 (20.32) 47.98 (21.74) 45.16 (20.53) 46.65 (21.02) 48.93 (22.84)
2 50.19 (25.46) 47.61 (25.00) 46.58 (25.10) 46.35 (24.46) 52.63 (24.79) 56.24 (26.81) 51.57 (25.24)
E2
1 50.06 (20.80) 50.11 (19.95) 51.67 (20.49) 50.28 (21.62) 48.61 (20.08) 51.05 (20.78) 48.70 (21.84)
2 50.75 (23.38) 54.34 (22.15) 55.01 (23.46) 53.28 (24.48) 45.51 (21.29) 48.63 (24.14) 47.89 (23.23)
E3
1 48.43 (20.34) 46.28 (19.22) 51.09 (20.01) 48.48 (20.58) 47.51 (21.53) 48.47 (19.36) 48.83 (21.11)
2 46.70 (24.49) 48.44 (22.04) 49.31 (23.08) 48.47 (23.55) 42.09 (25.44) 45.72 (26.34) 46.27 (25.70)
E12
1 58.13 (20.00) 57.13 (19.64) 58.80 (19.76) 59.20 (19.23) 57.57 (20.24) 58.02 (21.17) 58.11 (20.00)
2 61.94 (24.17) 60.22 (22.64) 61.48 (21.98) 58.80 (23.39) 62.03 (26.61) 66.30 (25.07) 62.72 (24.54)
E13
1 55.60 (20.11) 55.41 (19.15) 55.89 (20.00) 54.06 (21.15) 52.73 (20.07) 57.07 (19.93) 58.31 (20.09)
2 55.93 (23.45) 54.08 (23.73) 52.53 (24.57) 54.12 (23.32) 56.94 (21.92) 59.55 (24.11) 58.19 (22.41)
E23
1 60.42 (22.37) 59.32 (23.18) 59.63 (22.24) 60.04 (22.48) 59.58 (23.51) 62.71 (21.38) 61.24 (21.38)
2 58.37 (24.38) 62.63 (24.44) 63.52 (23.92) 62.12 (23.02) 52.01 (24.71) 54.40 (24.54) 55.77 (23.29)

3.3 Table 3

ols_3_1 <- lm(formula = b ~ age_35_52 + age_53_plus + female + high_education + high_income + 
    married + parentship, 
            data = data, 
            subset = (treated == FALSE))
se_3_1  <- coeftest(ols_3_1, vcov = vcovHC(ols_3_1, type = "HC1"))

ols_3_2 <- lm(formula = b ~ age_35_52 + age_53_plus + female + high_education + high_income + 
    married + parentship + high_temperature + high_usage + high_general_risk + 
    high_weather_risk + high_accuracy + high_credibility, 
            data = data, 
            subset = (treated == FALSE))
se_3_2  <- coeftest(ols_3_2, vcov = vcovHC(ols_3_2, type = "HC1"))

ols_3_3 <- lm(formula = a ~ age_35_52 + age_53_plus + female + high_education + high_income + 
    married + parentship, 
            data = data, 
            subset = (treated == FALSE))
se_3_3  <- coeftest(ols_3_3, vcov = vcovHC(ols_3_3, type = "HC1"))

ols_3_4 <- lm(formula = a ~ age_35_52 + age_53_plus + female + high_education + high_income + 
    married + parentship + high_temperature + high_usage + high_general_risk + 
    high_weather_risk + high_accuracy + high_credibility, 
            data = data, 
            subset = (treated == FALSE))
se_3_4  <- coeftest(ols_3_4, vcov = vcovHC(ols_3_4, type = "HC1"))
se_3 <- list(se_3_1[,2], se_3_2[,2], se_3_3[,2], se_3_4[,2])
p_3  <- list(se_3_1[,4], se_3_2[,4], se_3_3[,4], se_3_4[,4])

stargazer(ols_3_1, ols_3_2, ols_3_3, ols_3_4, 
          align = TRUE, 
          se = se_3, 
          p = p_3,   
          title = "Linear regressions: Explanatory analysis of Ambiguity Indices b and a",
          covariate.labels = c("age(35-52)",
                               "age(53-69)",
                               "gender (female)",
                               "high education",
                               "high income",
                               "family (married or same sex union)",
                               "parentship",
                               "high temperature (median)",
                               "high weather forecast usage (median)",
                               "high general risk attitude (median)",
                               "high weather risk attitude (median)",
                               "high accuracy (median)",
                               "high credibility (median)",
                               "Constant"), 
          font.size = "scriptsize",
          type = "html", 
          df = FALSE,
          style = "qje")
Linear regressions: Explanatory analysis of Ambiguity Indices b and a
b a
(1) (2) (3) (4)
age(35-52) -0.004 -0.001 0.065* 0.050
(0.021) (0.021) (0.037) (0.036)
age(53-69) -0.047** -0.043* 0.149*** 0.139***
(0.023) (0.023) (0.038) (0.037)
gender (female) -0.064*** -0.055*** 0.037 0.040
(0.017) (0.017) (0.027) (0.027)
high education -0.071*** -0.072*** 0.022 0.025
(0.018) (0.018) (0.028) (0.028)
high income 0.024 0.019 0.018 0.027
(0.017) (0.017) (0.030) (0.031)
family (married or same sex union) -0.019 -0.017 -0.056* -0.059*
(0.019) (0.019) (0.032) (0.033)
parentship 0.020 0.016 -0.054* -0.052*
(0.020) (0.020) (0.030) (0.030)
high temperature (median) -0.022 -0.074***
(0.016) (0.027)
high weather forecast usage (median) -0.016 -0.008
(0.016) (0.026)
high general risk attitude (median) 0.028 -0.010
(0.018) (0.027)
high weather risk attitude (median) 0.025 -0.028
(0.018) (0.028)
high accuracy (median) -0.025 -0.010
(0.022) (0.035)
high credibility (median) 0.007 -0.046
(0.017) (0.029)
Constant 0.013 0.017 0.656*** 0.759***
(0.023) (0.034) (0.039) (0.053)
N 1,361 1,361 1,361 1,361
R2 0.028 0.036 0.015 0.024
Adjusted R2 0.023 0.026 0.009 0.015
Residual Std. Error 0.289 0.289 0.480 0.479
F Statistic 5.550*** 3.835*** 2.845*** 2.542***
Notes: ***Significant at the 1 percent level.
**Significant at the 5 percent level.
*Significant at the 10 percent level.

3.4 Table 4

ols_4_1 <- lm(formula = b ~ surprise + treated + surprise*treated, 
            data = data)
se_4_1  <- coeftest(x = ols_4_1, 
                    vcov = vcovCL(ols_4_1,
                                  cluster = ~data$participant.label,
                                  type = "HC1"))

ols_4_2 <- lm(formula = b ~ communication + treated + communication*treated, 
            data = data,
            subset = (surprise == FALSE))
se_4_2  <- coeftest(x = ols_4_2, 
                    vcov = vcovCL(ols_4_2,
                                  cluster = data[surprise == FALSE, participant.label],
                                  type = "HC1"))

ols_4_3 <- lm(formula = b ~ communication + treated + communication*treated, 
            data = data,
            subset = (surprise == TRUE))
se_4_3  <- coeftest(x = ols_4_3, 
                    vcov = vcovCL(ols_4_3,
                                  cluster = data[surprise == TRUE, participant.label],
                                  type = "HC1"))

ols_4_4 <- lm(formula = b ~ surprise + treated + surprise*treated, 
            data = data,
            subset = (communication == "point"))
se_4_4  <- coeftest(x = ols_4_4, 
                    vcov = vcovCL(ols_4_4,
                                  cluster = data[communication == "point", participant.label],
                                  type = "HC1"))

ols_4_5 <- lm(formula = b ~ surprise + treated + surprise*treated, 
            data = data,
            subset = (communication == "interval"))
se_4_5  <- coeftest(x = ols_4_5, 
                    vcov = vcovCL(ols_4_5,
                                  cluster = data[communication == "interval", participant.label],
                                  type = "HC1"))

ols_4_6 <- lm(formula = b ~ surprise + treated + surprise*treated, 
            data = data,
            subset = (communication == "both"))
se_4_6  <- coeftest(x = ols_4_6, 
                    vcov = vcovCL(ols_4_6,
                                  cluster = data[communication == "both", participant.label],
                                  type = "HC1"))
se_4 <- list(se_4_1[,2], se_4_2[,2], se_4_3[,2], se_4_4[,2], se_4_5[,2], se_4_6[,2])
p_4  <- list(se_4_1[,4], se_4_2[,4], se_4_3[,4], se_4_4[,4], se_4_5[,4], se_4_6[,4])

stargazer(ols_4_1, ols_4_2, ols_4_3, ols_4_4, ols_4_5, ols_4_6, 
          align = TRUE, 
          se = se_4, 
          p = p_4,   
          title = "Linear regressions: Treatment effects on ambiguity index b",
          dep.var.caption = "Dependent variable: b",
          dep.var.labels = " ",
          model.names = FALSE,
          column.labels = c("full", "confirmation", "contradiction", "point", "interval", "both"),
          covariate.labels = c("contradiction", "both", "interval", "stage 2", "contradiction x stage 2", "interval x stage 2", "both x stage 2", "Constant"),
          font.size = "scriptsize",
          type = "html", 
          df = FALSE)
Linear regressions: Treatment effects on ambiguity index b
Dependent variable: b
full confirmation contradiction point interval both
(1) (2) (3) (4) (5) (6)
contradiction 0.002 0.012 -0.014 0.007
(0.015) (0.026) (0.027) (0.027)
both -0.038 -0.043
(0.025) (0.027)
interval -0.017 -0.043
(0.025) (0.027)
stage 2 -0.020** -0.041*** -0.0002 -0.041*** -0.010 -0.008
(0.008) (0.014) (0.014) (0.014) (0.014) (0.015)
contradiction x stage 2 0.014 0.041** 0.016 -0.015
(0.012) (0.020) (0.020) (0.020)
interval x stage 2 0.034 -0.023
(0.021) (0.020)
both x stage 2 0.031 0.006
(0.020) (0.020)
Constant -0.068*** -0.050*** -0.037* -0.050*** -0.067*** -0.087***
(0.010) (0.017) (0.019) (0.017) (0.019) (0.019)
Observations 3,010 1,490 1,520 1,014 1,002 994
R2 0.001 0.003 0.006 0.005 0.0003 0.001
Adjusted R2 -0.0002 -0.001 0.002 0.003 -0.003 -0.002
Residual Std. Error 0.310 0.300 0.320 0.299 0.317 0.315
F Statistic 0.770 0.819 1.694 1.851 0.083 0.244
Note: p<0.1; p<0.05; p<0.01

3.5 Table 5

ols_5_1 <- lm(formula = a ~ surprise + treated + surprise*treated,
              data = data)
se_5_1  <- coeftest(x = ols_5_1, 
                    vcov = vcovCL(ols_5_1,
                                  cluster = ~data$participant.label,
                                  type = "HC1"))

ols_5_2 <- lm(formula = a ~ communication + treated + communication*treated,
              data = data,
              subset = (surprise == FALSE))
se_5_2  <- coeftest(x = ols_5_2, 
                    vcov = vcovCL(ols_5_2,
                                  cluster = data[surprise == FALSE, participant.label],
                                  type = "HC1"))

ols_5_3 <- lm(formula = a ~ communication + treated + communication*treated,
              data = data,
              subset = (surprise == TRUE))
se_5_3  <- coeftest(x = ols_5_3, 
                    vcov = vcovCL(ols_5_3,
                                  cluster = data[surprise == TRUE, participant.label],
                                  type = "HC1"))

ols_5_4 <- lm(formula = a ~ surprise + treated + surprise*treated, 
              data = data,
              subset = (communication == "point"))
se_5_4  <- coeftest(x = ols_5_4, 
                    vcov = vcovCL(ols_5_4,
                                  cluster = data[communication == "point", participant.label],
                                  type = "HC1"))

ols_5_5 <- lm(formula = a ~ surprise + treated + surprise*treated, 
              data = data,
              subset = (communication == "interval"))
se_5_5  <- coeftest(x = ols_5_5, 
                    vcov = vcovCL(ols_5_5,
                                  cluster = data[communication == "interval", participant.label],
                                  type = "HC1"))

ols_5_6 <- lm(formula = a ~ surprise + treated + surprise*treated, 
              data = data,
              subset = (communication == "both"))
se_5_6  <- coeftest(x = ols_5_6, 
                    vcov = vcovCL(ols_5_6,
                                  cluster = data[communication == "both", participant.label],
                                  type = "HC1"))
se_5 <- list(se_5_1[,2], se_5_2[,2], se_5_3[,2], se_5_4[,2], se_5_5[,2], se_5_6[,2])
p_5  <- list(se_5_1[,4], se_5_2[,4], se_5_3[,4], se_5_4[,4], se_5_5[,4], se_5_6[,4])

stargazer(ols_5_1, ols_5_2, ols_5_3, ols_5_4, ols_5_5, ols_5_6, 
          align = TRUE, 
          se = se_5, 
          p = p_5,   
          title = "Linear regressions: Treatment effects on ambiguity index a",
          dep.var.caption = "Dependent variable: a",
          dep.var.labels = " ",
          model.names = FALSE,
          column.labels = c("full", "confirmation", "contradiction", "point", "interval", "both"),
          covariate.labels = c("contradiction", "both", "interval", "stage 2", "contradiction x stage 2", "interval x stage 2", "both x stage 2", "Constant"),
          font.size = "scriptsize",
          type = "html", 
          df = FALSE)
Linear regressions: Treatment effects on ambiguity index a
Dependent variable: a
full confirmation contradiction point interval both
(1) (2) (3) (4) (5) (6)
contradiction -0.045* 0.002 -0.046 -0.092**
(0.025) (0.044) (0.045) (0.041)
both 0.063 -0.030
(0.043) (0.041)
interval 0.023 -0.026
(0.045) (0.044)
stage 2 -0.007 0.023 -0.022 0.023 -0.004 -0.041
(0.020) (0.036) (0.033) (0.036) (0.033) (0.033)
contradiction x stage 2 0.007 -0.044 0.006 0.061
(0.027) (0.049) (0.046) (0.046)
interval x stage 2 -0.064 0.042
(0.049) (0.046)
both x stage 2 -0.027 0.024
(0.049) (0.046)
Constant 0.740*** 0.712*** 0.714*** 0.712*** 0.734*** 0.775***
(0.018) (0.031) (0.031) (0.031) (0.033) (0.030)
Observations 3,010 1,490 1,520 1,014 1,002 994
R2 0.002 0.001 0.0004 0.001 0.002 0.005
Adjusted R2 0.001 -0.002 -0.003 -0.002 -0.001 0.002
Residual Std. Error 0.513 0.516 0.512 0.515 0.535 0.490
F Statistic 1.647 0.405 0.123 0.285 0.549 1.639
Note: p<0.1; p<0.05; p<0.01

3.6 Table 6

ols_6_1 <- lm(formula = ed ~ surprise, 
            data = data,
            subset = (stage == 2))
se_6_1  <- coeftest(ols_6_1, vcov = vcovHC(ols_6_1, type = "HC1"))

ols_6_2 <- lm(formula = ed ~ communication, 
            data = data,
            subset = (stage == 2 & surprise == FALSE))
se_6_2  <- coeftest(ols_6_2, vcov = vcovHC(ols_6_2, type = "HC1"))

ols_6_3 <- lm(formula = ed ~ communication, 
            data = data,
            subset = (stage == 2 & surprise == TRUE))
se_6_3  <- coeftest(ols_6_3, vcov = vcovHC(ols_6_3, type = "HC1"))

ols_6_4 <- lm(formula = ed ~ surprise, 
            data = data,
            subset = (stage == 2 & communication == "point"))
se_6_4  <- coeftest(ols_6_4, vcov = vcovHC(ols_6_4, type = "HC1"))

ols_6_5 <- lm(formula = ed ~ surprise, 
            data = data,
            subset = (stage == 2 & communication == "interval"))
se_6_5  <- coeftest(ols_6_5, vcov = vcovHC(ols_6_5, type = "HC1"))

ols_6_6 <- lm(formula = ed ~ surprise, 
            data = data,
            subset = (stage == 2 & communication == "both"))
se_6_6  <- coeftest(ols_6_6, vcov = vcovHC(ols_6_6, type = "HC1"))
se_6 <- list(se_6_1[,2], se_6_2[,2], se_6_3[,2], se_6_4[,2], se_6_5[,2], se_6_6[,2])
p_6  <- list(se_6_1[,4], se_6_2[,4], se_6_3[,4], se_6_4[,4], se_6_5[,4], se_6_6[,4])

stargazer(ols_6_1, ols_6_2, ols_6_3, ols_6_4, ols_6_5, ols_6_6, 
          align = TRUE, 
          se = se_6,
          p = p_6,   
          title = "Linear regressions: Treatment effects on Euclidian distance between vector of matching probabilities in stage 1 vs. stage 2",
          dep.var.caption = "Dependent variable: Euclidian distance stage 2 vs. stage 1",
          dep.var.labels = " ",
          model.names = FALSE,
          column.labels = c("full", "confirmation", "contradiction", "point", "interval", "both"),
          covariate.labels = c("contradiction", "both", "interval", "Constant"),
          font.size = "scriptsize",
          type = "html", 
          df = FALSE)
Linear regressions: Treatment effects on Euclidian distance between vector of matching probabilities in stage 1 vs. stage 2
Dependent variable: Euclidian distance stage 2 vs. stage 1
full confirmation contradiction point interval both
(1) (2) (3) (4) (5) (6)
contradiction 6.824*** 10.120*** 2.423 8.023**
(1.775) (3.029) (3.033) (3.158)
both 1.195 -0.902
(2.991) (3.194)
interval 0.701 -6.996**
(2.933) (3.126)
Constant 44.941*** 44.316*** 54.436*** 44.316*** 45.017*** 45.511***
(1.224) (2.001) (2.274) (2.001) (2.144) (2.223)
Observations 1,505 745 760 507 501 497
R2 0.010 0.0002 0.008 0.022 0.001 0.013
Adjusted R2 0.009 -0.002 0.005 0.020 -0.001 0.011
Residual Std. Error 34.441 33.445 35.337 34.076 33.956 35.205
F Statistic 14.768*** 0.081 2.966* 11.180*** 0.637 6.453**
Note: p<0.1; p<0.05; p<0.01